{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import math\n",
    "import random\n",
    "from collections import defaultdict\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split \n",
    "from sklearn.metrics import accuracy_score, classification_report\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.naive_bayes import MultinomialNB, GaussianNB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Ler arquivo\n",
    "df = pd.read_csv('carData.csv', header=None)\n",
    "df.columns = ['buying', 'maint','doors','persons','lug_boot','safety','class']\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>buying</th>\n",
       "      <th>maint</th>\n",
       "      <th>doors</th>\n",
       "      <th>persons</th>\n",
       "      <th>lug_boot</th>\n",
       "      <th>safety</th>\n",
       "      <th>class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>med</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>med</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>med</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>big</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>big</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>big</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>small</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>small</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>small</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>med</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>med</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>med</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>big</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>big</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>big</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>more</td>\n",
       "      <td>small</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>more</td>\n",
       "      <td>small</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>more</td>\n",
       "      <td>small</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>more</td>\n",
       "      <td>med</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>more</td>\n",
       "      <td>med</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>more</td>\n",
       "      <td>med</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>more</td>\n",
       "      <td>big</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>more</td>\n",
       "      <td>big</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>more</td>\n",
       "      <td>big</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1698</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>4</td>\n",
       "      <td>more</td>\n",
       "      <td>big</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1699</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>4</td>\n",
       "      <td>more</td>\n",
       "      <td>big</td>\n",
       "      <td>med</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1700</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>4</td>\n",
       "      <td>more</td>\n",
       "      <td>big</td>\n",
       "      <td>high</td>\n",
       "      <td>vgood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1701</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1702</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1703</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1704</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>2</td>\n",
       "      <td>med</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1705</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>2</td>\n",
       "      <td>med</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1706</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>2</td>\n",
       "      <td>med</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1707</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>2</td>\n",
       "      <td>big</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1708</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>2</td>\n",
       "      <td>big</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1709</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>2</td>\n",
       "      <td>big</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1710</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>4</td>\n",
       "      <td>small</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1711</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>4</td>\n",
       "      <td>small</td>\n",
       "      <td>med</td>\n",
       "      <td>acc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1712</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>4</td>\n",
       "      <td>small</td>\n",
       "      <td>high</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1713</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>4</td>\n",
       "      <td>med</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1714</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>4</td>\n",
       "      <td>med</td>\n",
       "      <td>med</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1715</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>4</td>\n",
       "      <td>med</td>\n",
       "      <td>high</td>\n",
       "      <td>vgood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1716</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>4</td>\n",
       "      <td>big</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1717</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>4</td>\n",
       "      <td>big</td>\n",
       "      <td>med</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1718</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>4</td>\n",
       "      <td>big</td>\n",
       "      <td>high</td>\n",
       "      <td>vgood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1719</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>more</td>\n",
       "      <td>small</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1720</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>more</td>\n",
       "      <td>small</td>\n",
       "      <td>med</td>\n",
       "      <td>acc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1721</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>more</td>\n",
       "      <td>small</td>\n",
       "      <td>high</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1722</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>more</td>\n",
       "      <td>med</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1723</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>more</td>\n",
       "      <td>med</td>\n",
       "      <td>med</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1724</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>more</td>\n",
       "      <td>med</td>\n",
       "      <td>high</td>\n",
       "      <td>vgood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1725</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>more</td>\n",
       "      <td>big</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1726</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>more</td>\n",
       "      <td>big</td>\n",
       "      <td>med</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1727</th>\n",
       "      <td>low</td>\n",
       "      <td>low</td>\n",
       "      <td>5more</td>\n",
       "      <td>more</td>\n",
       "      <td>big</td>\n",
       "      <td>high</td>\n",
       "      <td>vgood</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1728 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     buying  maint  doors persons lug_boot safety  class\n",
       "0     vhigh  vhigh      2       2    small    low  unacc\n",
       "1     vhigh  vhigh      2       2    small    med  unacc\n",
       "2     vhigh  vhigh      2       2    small   high  unacc\n",
       "3     vhigh  vhigh      2       2      med    low  unacc\n",
       "4     vhigh  vhigh      2       2      med    med  unacc\n",
       "5     vhigh  vhigh      2       2      med   high  unacc\n",
       "6     vhigh  vhigh      2       2      big    low  unacc\n",
       "7     vhigh  vhigh      2       2      big    med  unacc\n",
       "8     vhigh  vhigh      2       2      big   high  unacc\n",
       "9     vhigh  vhigh      2       4    small    low  unacc\n",
       "10    vhigh  vhigh      2       4    small    med  unacc\n",
       "11    vhigh  vhigh      2       4    small   high  unacc\n",
       "12    vhigh  vhigh      2       4      med    low  unacc\n",
       "13    vhigh  vhigh      2       4      med    med  unacc\n",
       "14    vhigh  vhigh      2       4      med   high  unacc\n",
       "15    vhigh  vhigh      2       4      big    low  unacc\n",
       "16    vhigh  vhigh      2       4      big    med  unacc\n",
       "17    vhigh  vhigh      2       4      big   high  unacc\n",
       "18    vhigh  vhigh      2    more    small    low  unacc\n",
       "19    vhigh  vhigh      2    more    small    med  unacc\n",
       "20    vhigh  vhigh      2    more    small   high  unacc\n",
       "21    vhigh  vhigh      2    more      med    low  unacc\n",
       "22    vhigh  vhigh      2    more      med    med  unacc\n",
       "23    vhigh  vhigh      2    more      med   high  unacc\n",
       "24    vhigh  vhigh      2    more      big    low  unacc\n",
       "25    vhigh  vhigh      2    more      big    med  unacc\n",
       "26    vhigh  vhigh      2    more      big   high  unacc\n",
       "27    vhigh  vhigh      3       2    small    low  unacc\n",
       "28    vhigh  vhigh      3       2    small    med  unacc\n",
       "29    vhigh  vhigh      3       2    small   high  unacc\n",
       "...     ...    ...    ...     ...      ...    ...    ...\n",
       "1698    low    low      4    more      big    low  unacc\n",
       "1699    low    low      4    more      big    med   good\n",
       "1700    low    low      4    more      big   high  vgood\n",
       "1701    low    low  5more       2    small    low  unacc\n",
       "1702    low    low  5more       2    small    med  unacc\n",
       "1703    low    low  5more       2    small   high  unacc\n",
       "1704    low    low  5more       2      med    low  unacc\n",
       "1705    low    low  5more       2      med    med  unacc\n",
       "1706    low    low  5more       2      med   high  unacc\n",
       "1707    low    low  5more       2      big    low  unacc\n",
       "1708    low    low  5more       2      big    med  unacc\n",
       "1709    low    low  5more       2      big   high  unacc\n",
       "1710    low    low  5more       4    small    low  unacc\n",
       "1711    low    low  5more       4    small    med    acc\n",
       "1712    low    low  5more       4    small   high   good\n",
       "1713    low    low  5more       4      med    low  unacc\n",
       "1714    low    low  5more       4      med    med   good\n",
       "1715    low    low  5more       4      med   high  vgood\n",
       "1716    low    low  5more       4      big    low  unacc\n",
       "1717    low    low  5more       4      big    med   good\n",
       "1718    low    low  5more       4      big   high  vgood\n",
       "1719    low    low  5more    more    small    low  unacc\n",
       "1720    low    low  5more    more    small    med    acc\n",
       "1721    low    low  5more    more    small   high   good\n",
       "1722    low    low  5more    more      med    low  unacc\n",
       "1723    low    low  5more    more      med    med   good\n",
       "1724    low    low  5more    more      med   high  vgood\n",
       "1725    low    low  5more    more      big    low  unacc\n",
       "1726    low    low  5more    more      big    med   good\n",
       "1727    low    low  5more    more      big   high  vgood\n",
       "\n",
       "[1728 rows x 7 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train, df_test = train_test_split(df,test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df_train\n",
    "totalSize = len(df)\n",
    "X_test = df_test.iloc[:,0:-1]\n",
    "Y_test = df_test['class']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "for class_value in df['class'].unique():\n",
    "    class_set = df[df['class'] == class_value]\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'class'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: an integer is required",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m~/miniconda3/envs/juliette/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   2521\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2522\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2523\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 'class'",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: an integer is required",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-29-4d6ae8d8ad5d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#Frequencia de cada atributo em cada classe\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mfrequency\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0mfrequency\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/juliette/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   2137\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2138\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2139\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2141\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/juliette/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_getitem_column\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   2144\u001b[0m         \u001b[0;31m# get column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2145\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2146\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2147\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2148\u001b[0m         \u001b[0;31m# duplicate columns & possible reduce dimensionality\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/juliette/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_get_item_cache\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m   1840\u001b[0m         \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1841\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1842\u001b[0;31m             \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1843\u001b[0m             \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_box_item_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1844\u001b[0m             \u001b[0mcache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/juliette/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, item, fastpath)\u001b[0m\n\u001b[1;32m   3836\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3837\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3838\u001b[0;31m                 \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3839\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3840\u001b[0m                 \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/juliette/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   2522\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2523\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2524\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2525\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2526\u001b[0m         \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 'class'"
     ]
    }
   ],
   "source": [
    "#Frequencia de cada atributo em cada classe\n",
    "\n",
    "frequency = [[[len(df[df['class'] == i].index & df[df[j] == k].index)  for k in df[j].unique()] for j in df.columns[:-1]] for i in df['class'].unique()]\n",
    "frequency"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Questão 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dataset\n",
    "df = pd.read_csv('carData.csv', header=None)\n",
    "\n",
    "#  variáveis categóricas em valores discretos\n",
    "for i in range(0, df.shape[1]):\n",
    "    df.iloc[:,i] = LabelEncoder().fit_transform(df.iloc[:,i])\n",
    "\n",
    "# conjunto treino e teste\n",
    "X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,:-1], df.iloc[:,-1], test_size=0.3)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.6936416184971098%\n",
      "\n",
      "Classification Report:\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "      unacc       0.50      0.01      0.02       120\n",
      "        acc       0.00      0.00      0.00        21\n",
      "       good       0.70      1.00      0.82       360\n",
      "      vgood       0.00      0.00      0.00        18\n",
      "\n",
      "avg / total       0.60      0.69      0.57       519\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/juliette/miniconda3/envs/juliette/lib/python3.6/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    }
   ],
   "source": [
    "##função naive bayes do SKlearn\n",
    "clf = MultinomialNB()\n",
    "clf.fit(X_train.values, y_train.values)\n",
    "\n",
    "y_pred = clf.predict(X_test.values)\n",
    "\n",
    "\n",
    "print(\"Accuracy: {}%\".format(accuracy_score(y_true=y_test, y_pred=y_pred)))\n",
    "\n",
    "print(\"\\nClassification Report:\")\n",
    "print(classification_report(y_true=y_test, y_pred=y_pred, target_names=[\"unacc\", \"acc\", \"good\", \"vgood\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
